import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('train.csv')

df['month']=df['datetime'].map(lambda x:x.split()[0].split('-')[1])
df['day']=df['datetime'].map(lambda x:x.split()[0].split('-')[2])
df['hour']=df['datetime'].map(lambda x:int(x.split()[1].split(':')[0]))
print(df.head())
df.groupby('hour')['count'].sum().plot(kind='line')
plt.show()

def a(x):
    if x>=7 and  x<10:
        return '早高峰'
    elif x>=10 and x<17:
        return '白天低谷'
    elif x>=17 and x<22:
        return '晚高峰'
    else:
        return '晚上低谷'

df['hour_section']=df['hour'].map(a)
print(df.head())

import seaborn as sns

sns.boxplot(y='count',data=df)
plt.show()

miu=df['count'].mean()
sigma=df['count'].std()
noise=df[abs(df['count']-miu)>(3*sigma)]
nonoise=df[abs(df['count']-miu)<(3*sigma)]
print(len(noise))
df=nonoise
